set.seed(42)
xy <- data.frame(x = rnorm(1000), y = rnorm(1000))
xy$obs <- abs(xy$x + xy$y) < 0.5 + runif(1000,0,2)
ggplot(xy, aes(x=x, y=y)) +
geom_point(aes(color=obs)) +
geom_smooth(data = xy[xy$obs,],
method = 'lm', se = FALSE, color = "#00BFC4") +
geom_smooth(method = 'lm', se = FALSE) +
time_series_ggstyle +
labs(color = "Observed",
title = "Restaurant and location quality",
subtitle="Survivor bias",
y = "Restaurant Quality", x = "Location Quality") +
annotate('text',
x = 2, y = -0.2, hjust=0,
label = "Population regression line",
color = "blue", size = 8 ) +
theme(axis.title.x = element_text())